rm(list=ls())
library(foreign)
library(Zelig) #requires Zelig 3.5.5 for compatibility
                #available at https://cran.r-project.org/src/contrib/Archive/Zelig/

#number of imputed datasets
m <- 10 

#delete next line
setwd("C:/Users/Martin Steinwand/Documents/Project Maritime Boundaries/Analysis/R&RJCR/ReplicationPackage")
#uncomment next line, update your path
#setwd(<Your replication directory>)

#load routines needed for generating marginal effects
source("marginaleffects.R")
#load raw data
alldat <- read.dta("Boundary Data_work.8-22-15.dta")
attach(alldat)
if(id2[2]-id2[1]!=0)cat("Data not sorted \n")
if(year[2]-year[1]!=1)cat("Data not sorted \n")
alldat$difshare <- -(alldat$difshare-100)
workdat <- data.frame(subset(alldat, ongoing2==0))
detach(alldat)

#load imputed data
load(file="BDdat8-2-12y5dum.NoOngoing.RData")

#create new variables
newdat <- transform(newdat, ddema=as.numeric(politya>=7))
newdat <- transform(newdat, ddemb=as.numeric(polityb>=7))
newdat <- transform(newdat, autoca=as.numeric(politya<=-5))
newdat <- transform(newdat, autocb=as.numeric(polityb<=-5 & polityb>=-10))
newdat <- transform(newdat, jointdem=as.numeric(ddema==1 & ddemb==1))
newdat <- transform(newdat, jointautoc=as.numeric(autoca==1 & autocb==1))
newdat <- transform(newdat, demautoc=as.numeric((ddema==1 & autocb==1) | (ddemb==1 & autoca==1)))
newdat <- transform(newdat, gdpdif=abs(gdpa2 - gdpb2)/(gdpa2 + gdpb2))
newdat <- transform(newdat, dependsingle=as.numeric((dependa==1 & dependb==0)|(dependb==1 & dependa==0)))
newdat <- transform(newdat, dependboth=as.numeric(dependa==1 & dependb==1))
newdat <- transform(newdat, coastdif=abs(coastla-coastlb)/(coastla+coastlb))
newdat <- transform(newdat, dcap=abs(capa-capb))
newdat <- transform(newdat, lcoastdif = log(as.numeric(coastdif<=0)*.001+as.numeric(coastdif>0)*coastdif))
newdat <- transform(newdat, vetotot=(vetoa + vetob)/2)
newdat <- transform(newdat, vetodif=abs(vetoa - vetob))
newdat <- transform(newdat, tsp1=workdat$tsp1)
newdat <- transform(newdat, tsp2=workdat$tsp2)
newdat <- transform(newdat, tsp3=workdat$tsp3)
newdat <- transform(newdat, totag=workdat$totag)
newdat <- transform(newdat, depend=as.numeric(dependsingle==1 | dependboth==1))
newdat <- transform(newdat, los=as.numeric(lossingle==1 | losboth ==1))
newdat <- transform(newdat, mid3y=workdat$mid3y)
newdat <- transform(newdat, mid5y=workdat$mid5y)
newdat <- transform(newdat, mid10y=workdat$mid10y)
newdat <- transform(newdat, hmid3y=workdat$hmid3y)
newdat <- transform(newdat, hmid5y=workdat$hmid5y)
newdat <- transform(newdat, hmid10y=workdat$hmid10y)
#additional transformations
newdat <- transform(newdat, difshare=workdat$difshare)
newdat <- transform(newdat, difshare.sc=(workdat$difshare*.01*(189-1)+.5)/189)#rescaled for beta model
            #rescaling according to Smithson and Verkuilen (2006)
newdat <- transform(newdat, diffrac=workdat$difshare/100) #for fractional logit
newdat <- transform(newdat, difdisc=as.numeric(workdat$difshare>=50))
newdat <- transform(newdat, difdisc2=as.numeric(workdat$difshare>0))
#create interactions
newdat <- transform(newdat, JdemOildis=jointdem * workdat$oildisc_f)
newdat <- transform(newdat, JdemOilprod=jointdem * workdat$oilprod_f)
newdat <- transform(newdat, JdemTrade=jointdem * workdat$tradetot1000_f)
newdat <- transform(newdat, JdemFish=jointdem * workdat$fishtot_f)
newdat <- transform(newdat, DcapOildis=dcap * workdat$oildisc_f)
newdat <- transform(newdat, DcapOilprod=dcap * workdat$oilprod_f)
newdat <- transform(newdat, DcapTrade=dcap * workdat$tradetot1000_f)
newdat <- transform(newdat, OilProdDisc=workdat$oildisc_f*workdat$oilprod_f)
newdat <- transform(newdat, oilone=workdat$oilone)
newdat <- transform(newdat, oiltwo=workdat$oiltwo)
newdat <- transform(newdat, oileither=workdat$oileither)#selection equation
newdat <- transform(newdat, oilproddisc=workdat$oileither * workdat$oildisc_f)#selection equation
#now add the dummies for directed oil discovery
newdat <- transform(newdat, oildiscboth = as.numeric(workdat$oildisa==1 & 
                    workdat$oildisb==1))
newdat <- transform(newdat, oildiscone = as.numeric((workdat$oildisa==1 |
                    workdat$oildisb==1) & workdat$oildisa!= workdat$oildisb))
newdat <- transform(newdat, oildiscany= oildiscone+oildiscboth)
newdat <- transform(newdat, totdif=workdat$totdif)
newdat <- transform(newdat, ltotdif=log(workdat$totdif+1))
#routine for printing results
sumres <- function(obj){
    vn <- rownames(obj)
    cat("variable\t", "coef ", "p-value \n", sep="")
    for (i in 1:nrow(obj)){
        cat(vn[i], "\t", obj[i,1], " ", 2*pnorm(-abs(obj[i,1]/obj[i,2])), "\n", sep="")
    }
}

#Obtain first-stage estimates
sform <- formula(agreement ~ jointdem + oildisc_f + oileither+ oilproddisc+
            fishtot_f + tradetot1000_f + los + totag+ dependsingle + dependboth + 
             terrdisp +  voteshare + gdpdif + dcap + tsp1 + tsp2 + tsp3 +
            y5dum1 + y5dum2+ y5dum3 + y5dum4 + y5dum5 + y5dum6 +y5dum7 + y5dum8 +y5dum9 -1)
s.mod <- zelig(sform, 
            data=newdat$imputations, model="probit", cite=FALSE)
summary(s.mod)

#indicator for dummy variables in analysis
dums <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
reps <- 10000 # no. iterations for simulated confidence bds
#calculate marginal effects & confidence bds
meff.selection.full <- meff.s(s.mod, sform, dums, newdat, reps, 12)
#counterfactual for interaction effect
meff.selection.full.int <- meff.s.int(s.mod, sform, dums, newdat, reps, 12, 4)

#Selection correction
n <- length(s.mod[[1]]$residuals)
imr <-matrix(NA, n,m)
dhat.avg<- rep(NA,m)
for(i in 1:m){
    aux.mod<-glm(sform, 
            data=newdat$imputations[[i]], binomial(link = "probit"), na.action=na.exclude)
    #compute inverse mills ratio 
    imr.mis <- dnorm(aux.mod$linear.predictors)/pnorm(aux.mod$linear.predictors)
    imr[,i] <- naresid(aux.mod$na.action, imr.mis)
    #collect ingredients for estimates of sigma and rho
    dhat.mis <- imr.mis * (imr.mis - aux.mod$linear.predictors)
    dhat.avg[i] <- sum(dhat.mis)/length(dhat.mis)
}


#obtain second stage estimates
modform <- formula(difdisc ~ imr[,j]
            +jointdem + oildisc_f + oileither + oilproddisc+ fishtot_f+tradetot1000_f + los
            +dependsingle+ dependboth + terrdisp + lcoastdif + gdpdif + dcap)
j<-1 
if (all.vars(modform[[3]])[2]=="j") k <- length(all.vars(modform[[3]]))  else k <- length(all.vars(modform[[3]])) +1
n.mod.full <- vector("list", m)
rho  <- rep(NA, m)
sterr  <- matrix(NA, k, m)
tstat <- matrix(NA, k, m)
for(j in 1:m){
    n.mod.full[[j]] <- glm(modform, data=newdat$imputations[[j]]
         ,family=binomial(link = "probit"), na.action=na.omit)
    varcov <- sandwich(n.mod.full[[j]])
    sterr[,j] <- sqrt(diag(varcov[1:k,1:k]))
    tstat[,j] <- coefficients(n.mod.full[[j]])[1:k]/sterr[,j]
    
        #obtain estimates of rho and sigma
    varre <- t(as.matrix(n.mod.full[[j]]$residuals))%*%as.matrix(n.mod.full[[j]]$residuals)/length(n.mod.full[[j]]$residuals)
    sigsqhat <- varre+dhat.avg[j]*coefficients(n.mod.full[[j]])[2]^2
    rho[j] <- sqrt(coefficients(n.mod.full[[j]])[2]^2/sigsqhat)
}
getcoef <- function(x){
    x$coefficients
}
res <- mi.meld(t(sapply(n.mod.full, getcoef)), t(sterr))
out <- t(rbind(res$q.mi, res$se.mi, res$q.mi/res$se.mi))
sumres(out)

#indicator for dummy variables
dums2 <- c(1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0)
meff.outcome.full <- meff.m(modform, n.mod.full, dums2, newdat, reps)
#counterfactual for interaction effect
meff.outcome.full.int <- meff.m.int(modform, n.mod.full, dums2, newdat, reps=10000, pos=5)


#generate plot
secmod <- meff.selection.full
outmod <- meff.outcome.full

varord <- c(1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 3, 1, 1)
#1 both, 2 selection only, 3 outcome only

varall <- c("Two Democracies",
    "Oil Discovery, (Prod=0)", "Oil Discovery, (Prod=1)", "Oil Production, (Disc=0)", "Fishery Total", 
    "Trade Total", "Law of Sea", "Previous Agreements",
    "Dependency, single", "Dependency, both", "Territorial Dispute", 
    "Joint UN Voting", "Difference Coastline", " Difference in GDP", "Relative Capabilities")

k <- length(varord)
meff.all <- array(NA, c(3,k,2))
cs <- 0 #initiate counter for skiped variables 
co <- 0
for (i in 1:k){
    if (varord[i]==1){ 
        meff.all[,i,1] <- secmod$meff90[,i-cs]
        meff.all[,i,2] <- outmod$meff90[,i-co+2]
    } else if (varord[i]==2){
        meff.all[,i,1] <- secmod$meff90[,i-cs]
        co <- co +1
    } else {
        meff.all[,i,2] <- outmod$meff90[,i-co+2]
        cs <- cs + 1
    }
}
#swap in interaction results
meff.all[,4,] <- meff.all[,3,]
meff.all[,2,1] <- meff.selection.full.int$meff90.0[,2]
meff.all[,3,1] <- meff.selection.full.int$meff90.1[,2]
meff.all[,2,2] <- meff.outcome.full.int$meff90.0[,4]
meff.all[,3,2] <- meff.outcome.full.int$meff90.1[,4]
meff.all <- meff.all*100
colnames(meff.all) <- varall

y.axis <- c(k:1)#create indicator for y.axis
par(mfrow=c(1,2))
for (p in 1:2){
    if (p==1) {
        par(mar=c(2, 8.5, 2, 0)) 
        low <- -20; hgh <- 40
    } else {
        par(mar=c(2, 3, 2, 1.5))
        low <- -80; hgh <- 80
    }
    plot(c(low, hgh), c(1,k), type = "n", 
            axes=F,
            xlab = "", 
            ylab = "", #turn off axes and labels. 
            xlim=c(low, hgh), c(1,k), xaxs = "r",
             main = c("Probability of Reaching Agreement", "Ratio of Deviations from Median Line")[p])
    segments(meff.all[1,,p], y.axis, meff.all[3,,p], y.axis)
    points(meff.all[2,,p], y.axis, pch = 19, cex=.8)
    axis(1, at = signif(seq(low, hgh,length.out=5),2), xpd=TRUE,
            tick = T,#draw x-axis and labels with tick marks
            cex.axis = .8, mgp = c(2,.5,0))#reduce label size, moves labels closer to tick marks
    if (p==1) axis(2, at = y.axis, label = varall, las = 1, tick = T, 
            cex.axis = .8) #draw y-axis with tick marks, make labels perpendicular to axis and closer to axis
    lines (c(0,0), c(0,k), lwd=1, lty=3)
}
